# Load libraries
library(RSocrata)
library(readr)
library(rgdal)
library(dplyr)
library(ggplot2)
library(mapproj)
library(RColorBrewer)
library(graphZoo)
# Custom functions
capwords <- function(s, strict = FALSE) {
cap <- function(s) paste(toupper(substring(s, 1, 1)),
{s <- substring(s, 2); if(strict) tolower(s) else s},
sep = "", collapse = " " )
sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s)))
}
# Prepare color-blind friendly palette
cbf <- brewer.pal(8, "Dark2")
# Load collision data
if (!file.exists("../data/collisions2014.csv")) {
collisions <- read.socrata("https://data.cityofnewyork.us/resource/h9gi-nx95.csv?borough=MANHATTAN&$where=date >= '2014-01-01' AND date <= '2014-12-31'")
write_csv(collisions, "../data/collisions2014.csv")
} else {
collisions <- read_csv("../data/collisions2014.csv")
}
## Warning: 663 problems parsing '../data/collisions2014.csv'. See
## problems(...) for more details.
# Load map data
if (!file.exists("../map/nybb.shp")) {
download.file("http://www.nyc.gov/html/dcp/download/bytes/nybb_15a.zip",
"../map/nybb_15a.zip")
unzip("../map/nybb_15a.zip", exdir = "../map/", junkpaths = TRUE)
}
nyc_map <- spTransform(readOGR("../map/nybb.shp", layer = "nybb", verbose = FALSE),
CRS("+proj=longlat +datum=WGS84"))
daily1 <- collisions %>%
mutate(DATE = as.Date(DATE)) %>%
mutate(DAY = factor(weekdays(DATE, abbreviate = TRUE),
c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))) %>%
group_by(DAY, DATE) %>%
summarize(n = length(DAY))
daily2 <- daily1 %>%
group_by(DAY) %>%
summarize(mean = mean(n)) %>%
mutate(group = 1)
g <- ggplot() +
geom_point(data = daily1,
aes(x = DAY, y = n, color = DAY),
position = position_jitter(width = .25)) +
geom_point(data = daily2,
aes(x = DAY, y = mean, group = group),
size = 5) +
geom_line(data = daily2,
aes(x = DAY, y = mean, group = group)) +
theme_graphzoo(base_size = 15) + guides(color = FALSE) +
scale_color_manual(values = cbf[1:7]) +
xlab(NULL) + ylab(NULL)
g <- addTitle(g, "Manhanttan's daily vehicular collisions rate (Jan-Dec 2014)", n.lines = 2)
g <- addBanner(g, font.size = 4, heights = c(1, 0.05 * 8 / 6),
l.txt = "GRAPHZOO.TUMBLR.COM",
r.txt = "SOURCE: DATA.CITYOFNEWYORK.US")
g
Figure 1: Figure caption.
hourly1 <- collisions %>%
mutate(HOUR = factor(as.numeric(gsub(":[0-9]+", "", TIME)))) %>%
group_by(HOUR, DATE) %>%
summarize(n = length(HOUR))
hourly2 <- hourly1 %>%
group_by(HOUR) %>%
summarize(mean = mean(n)) %>%
mutate(group = 1)
g <- ggplot() +
geom_point(data = hourly1,
aes(x = HOUR, y = n, color = n),
position = position_jitter(width = .25),
alpha = 0.25) +
geom_point(data = hourly2,
aes(x = HOUR, y = mean, group = group),
size = 5) +
geom_line(data = hourly2,
aes(x = HOUR, y = mean, group = group)) +
theme_graphzoo(base_size = 15) + guides(color = FALSE) +
scale_color_gradient2(low = cbf[3], mid = cbf[2], high = cbf[6], midpoint = 12.5) +
xlab(NULL) + ylab(NULL) +
scale_x_discrete(breaks = seq(0, 23, 2), labels = paste0(seq(0, 23, 2), ":00"))
g <- addTitle(g, "Manhattan's hourly vehicular collision rate (Jan-Dec 2014)", n.lines = 2)
g <- addBanner(g, font.size = 4, heights = c(1, 0.05 * 8 / 6),
l.txt = "GRAPHZOO.TUMBLR.COM",
r.txt = "SOURCE: DATA.CITYOFNEWYORK.US")
g
Figure 2: Figure caption.
cross <- collisions %>%
mutate(order = ON.STREET.NAME > CROSS.STREET.NAME) %>%
rowwise() %>%
mutate(cross = ifelse(order, paste0(ON.STREET.NAME, " - ", CROSS.STREET.NAME),
paste0(CROSS.STREET.NAME, " - ", ON.STREET.NAME))) %>%
group_by(cross) %>%
summarize(n = length(cross), lat = mean(LATITUDE), long = mean(LONGITUDE)) %>%
filter(cross != " - ") %>%
arrange(desc(n)) %>%
mutate(cross = capwords(tolower(cross)))
## Warning: Grouping rowwise data frame strips rowwise nature
g <- ggplot(cross[1:50,], aes(x = reorder(factor(cross), n), y = n)) +
geom_bar(stat = "identity", color = "white", fill = cbf[2]) +
coord_flip() +
theme_graphzoo(base_size = 14) + guides(color = FALSE) +
theme(axis.text.y = element_text(size = 8)) +
xlab(NULL) + ylab("Number of vehicular collisions")
g <- addTitle(g, "Manhattan's 50 most dangerous crossings (Jan-Dec 2014)", n.lines = 1, font.size = 15)
g <- addBanner(g, font.size = 3, heights = c(1, 0.05 * 6 / 9),
l.txt = "GRAPHZOO.TUMBLR.COM",
r.txt = "SOURCE: DATA.CITYOFNEWYORK.US")
m <- ggplot() +
geom_polygon(data = filter(fortify(nyc_map), id == "2"),
aes(x = long, y = lat, group = group),
color = "grey50", size = 1, fill = "grey50") +
geom_point(data = cross[1:50,], aes(x = long, y = lat),
color = "white", alpha = 0.5) +
coord_map(orientation = c(68.5, 0, 50)) +
theme_graphzoo() +
theme(plot.background = element_rect(fill = rgb(0, 0, 0, 0), color = rgb(0, 0, 0, 0)),
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = unit(c(-.02, -.13, -0.06, -.2), "snpc"))
## Regions defined for each Polygons
vp <- viewport(width = 0.6, height = 0.6, x = 0.86, y = 0.41)
print(g)
print(m, vp = vp)
Figure 3: Figure caption.
map <- filter(fortify(nyc_map), id == "2")
## Regions defined for each Polygons
tmp <- mapproject(map$long, map$lat, orientation = c(68.5, 0, 50))
map <- mutate(map, long_rot = tmp$x, lat_rot = tmp$y)
tmp <- mapproject(cross$long, cross$lat, orientation = c(68.5, 0, 50))
cross <- mutate(cross, long_rot = tmp$x, lat_rot = tmp$y)
g <- ggplot() +
geom_polygon(data = map,
aes(x = long_rot, y = lat_rot, group = group),
color = "gray15", size = 1, fill = "gray15") +
geom_point(data = cross,
aes(x = long_rot, y = lat_rot, color = n, fill = n, size = n),
alpha = 0.5) +
coord_fixed() +
scale_color_gradient("", low = "red", high = "yellow",
breaks = c(25, 50, 75, 100, 125),
labels = c(25, 50, 75, 100, 125)) +
scale_fill_gradient("", low = "red", high = "yellow",
breaks = c(25, 50, 75, 100, 125),
labels = c(25, 50, 75, 100, 125)) +
scale_size_continuous("", breaks = c(25, 50, 75, 100, 125),
labels = c(25, 50, 75, 100, 125)) +
guides(fill = guide_legend(nrow = 1, override.aes = list(shape = 21, alpha = 0.75)),
color = FALSE) +
theme_graphzoo(base_size = 6) +
theme(plot.background = element_rect(fill = rgb(0, 0, 0, 0), color = rgb(0, 0, 0, 0)),
legend.justification=c(1, 1), legend.position=c(1.6, 0.9),
legend.key.width=unit(1, "line"),
axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = unit(c(0, 0, -0.05, 0), "snpc"))
b <- ggplot(data.frame(x = c(0, 1), y = c(0, 1)),
aes(x = x, y = y)) +
geom_blank() +
annotate("text", x = 1, y = 0.99,
label = "A year of accidents",
hjust = 1, family = "Avenir Next", size = 5.5) +
annotate("text", x = 1, y = 0.95,
label = "Locations and number of vehicular collisions\nManhattan, NYC (Jan-Dec 2014)",
hjust = 1, family = "Avenir Next", size = 2.25) +
theme_graphzoo() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = unit(c(0, 0, -0.05, 0), "snpc"))
b <- addBanner(b, font.size = 2.5, heights = c(1, 0.05 * 4 / 8),
l.txt = "GRAPHZOO.TUMBLR.COM",
r.txt = "SOURCE: DATA.CITYOFNEWYORK.US")
vp <- viewport(width = 1, height = 1, x = 0.45, y = 0.52)
print(b)
print(g, vp = vp)
Figure 4: Figure caption.